/* 
    Purpose: Using the 1960 Census (5% sample), this file locates black and white men aged 
             30-50 who are fathers of a child younger than 18 in the same household. Other 
             variables necessary to create average predicted father income are also 
             cleaned.

    Creates: Census1960_5pct_fathers30to50_adjustments.dta
*/
clear
set more off
cd "$Mydirectory1/1_DataSources/CensusData/"

use ./input/Census1960_5pct_raw.dta, clear //download from IPUMS USA

*------------------------------------------------------------------------------------*
*------------------------------------------------------------------------------------*

***************************
** SET UP INCOME VARIABLES
***************************

* Fix income variables 
	replace inctot=. if inctot==9999999 
	replace inctot=0 if inctot<0
	
	replace incwage=. if incwage==999999 
	replace incwage=0 if incwage<0
	
* Total earnings in household 
	bysort serial famunit: egen fam_earnings = sum(incwage)
	bysort serial : egen hh_earnings = sum(incwage)

* Family income (v1--Census variable)
	replace ftotinc=. if ftotinc==9999999 
	replace ftotinc=0 if ftotinc<0
	
* Family income (v2--manually construct by summing individual income of family unit members)
	bysort serial famunit: egen fam_income = sum(inctot)

* Count discrepancies between v1 and v2 
    /*Note: Discrepancies appear to come 
            mostly from individuals living 
            in group quarters. */ 	
	count if ftotinc==. & fam_income>0 

* Harmonize v1 and v2         	
	replace ftotinc = fam_income if ftotinc==. & fam_income>0
		
* Household income 

    //Grab one family member's income
	sort serial famunit pernum
	by serial famunit: gen fam_head = _n==1
	
	gen temp = .
	replace temp = ftotinc if fam_head==1 
	replace temp = 0 if fam_head==1 & ftotinc==. 

    //Add up incomes of "separate" families within a serial to get household income (i.e. income by serial number)	
	bysort serial: egen hh_income = sum(temp)
	drop temp fam_head
	
/*
   Convert income variables to 1950 dollars using the CPI: 
   Source: https://www.minneapolisfed.org/about-us/monetary-policy/inflation-calculator/consumer-price-index-1913-)
*/
	gen CPI1960 = 29.6
	gen CPI1950= 24.1
	
	foreach var of varlist incwage inctot ftotinc fam_earnings hh_earnings hh_income {
		replace `var' = `var' * (CPI1950 / CPI1960)
	}
	
*------------------------------------------------------------------------------------*
*------------------------------------------------------------------------------------*

***************************
** IDENTIFY FATHERS
***************************
  
	sort serial pernum

* Count # fathers w/ children <18 are in each household 
	replace poploc=. if poploc==0 //don't count people w/o father in house 
	replace poploc=. if age>=18 
	
	gen father_inHH = poploc<.
	egen tag = tag(serial poploc)
	egen number_fathers = total(tag), by(serial)
	replace number_fathers=. if number_fathers==0
	label var number_fathers "Number of fathers in household"	
	tab number_fathers, m 

* Tag each father in a household
	gen pop_test = poploc //person number of father
	gen father =.
	
	levelsof number_fathers, local(number)
	foreach x of local number {
		by serial: egen father_`x' = min(pop_test) //Assign person number of father with lowest person number
		replace pop_test =. if father_`x' == pop_test //Move on to next father in household
		replace father=1 if father_`x' == pernum
	}
	drop pop_test
	
* Keep fathers
	keep if father==1
	label var father "Father with child under 18 in household"

* Keep black and white fathers aged 30-50 
	keep if age>=30 & age<=50
	keep if race==1 | race==2
	tab sex
	
*------------------------------------------------------------------------------------*
*------------------------------------------------------------------------------------*

* Region of current residence 
    * Northeast: Connecticut, Maine, Massachusetts, New Hampshire, Rhode Island, Vermont, New Jersey, New York, Pennsylvania
    * Midwest: Illinois, Indiana, Michigan, Ohio, Wisconsin, Iowa, Kansas, Minnesota, Missouri, Nebraska, North Dakota, South Dakota
    /* South: Delaware, District of Columbia, Florida, Georgia, Maryland, North Carolina, South Carolina, Virginia, West Virginia, Alabama,
              Kentucky, Mississippi, Tennessee, Arkansas, Louisiana, Oklahoma, Texas */
    * West: Arizona, Colorado, Idaho, Montana, Nevada, New Mexico, Utah, Wyoming, California, Oregon, Washington --note: Census puts AK and HI in with Pacific division
    
	gen region_merge =.
	replace region_merge =1 if (region==11 | region==12) //Northeast
	replace region_merge =2 if (region==21 | region==22) //Midwest
	replace region_merge =3 if inrange(region,31,33) //South
	replace region_merge =4 if (region==41 | region==42) //West
	tab region, m
	tab region_merge, m
	
	label define region_l 1 "NORTHEAST" 2 "MIDWEST" 3 "SOUTH" 4 "WEST"
	label values region_merge region_l
	tab region_merge, m
	
	gen south_merge = region_merge==3
	
* Education 
	gen edu=.
	replace edu=1 if educd<=25 //<grade school
	replace edu=2 if educd==26 //8th grade
	replace edu=3 if inlist(educd,30,40,50) //<hs
	replace edu=4 if educd==60 //hs
	replace edu=5 if educd>=65 & educd<999 //>hs. "999" is missing
	tab edu, m
	
*------------------------------------------------------------------------------------*
*------------------------------------------------------------------------------------*
	
****************
**** SAVE 
****************

	compress 
	save ./input/Census1960_5pct_fathers30to50_adjustments.dta, replace 
	
